#### 21-02-25

#install.packages(c("glmmTMB", "lme4"))

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(performance)
library(summarytools)
## 
## Attaching package: 'summarytools'
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(ggeffects)
library(marginaleffects)
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.10). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## 
## The following object is masked from 'package:lme4':
## 
##     ngrps
## 
## The following object is masked from 'package:stats':
## 
##     ar
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(viridis)
## Loading required package: viridisLite
library(broom)
library(knitr)
library(dplyr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(gt)
library(modelsummary)
library(sjPlot)
## 
## Attaching package: 'sjPlot'
## 
## The following object is masked from 'package:ggplot2':
## 
##     set_theme
library(glmmTMB)
## Warning in check_dep_version(dep_pkg = "TMB"): package version mismatch: 
## glmmTMB was built with TMB package version 1.9.17
## Current TMB package version is 1.9.18
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
## 
## Attaching package: 'glmmTMB'
## 
## The following object is masked from 'package:brms':
## 
##     lognormal
library(COMPoissonReg)
## Loading required package: numDeriv
library(Rcpp)
library(numDeriv)


complete_2023_2024 <- read.csv("input/2023_2024_all_moth_counts.csv")
#clean_complete - full summer complete moth counts available for 2023 and 2024


# Data Cleaning -----------------------------------------------------------


##To explore the distribution of your variables and count data like clean_complete
# quick visualizations
dfSummary(complete_2023_2024)
## Data Frame Summary  
## complete_2023_2024  
## Dimensions: 255 x 20  
## Duplicates: 0  
## 
## ------------------------------------------------------------------------------------------------------------------------
## No   Variable            Stats / Values                Freqs (% of Valid)    Graph                 Valid      Missing   
## ---- ------------------- ----------------------------- --------------------- --------------------- ---------- ----------
## 1    patch_name          1. Oka                        44 (17.3%)            III                   255        0         
##      [character]         2. Orford                     39 (15.3%)            III                   (100.0%)   (0.0%)    
##                          3. Rigaud                     30 (11.8%)            II                                         
##                          4. Mont_Saint_Hilaire         29 (11.4%)            II                                         
##                          5. Montebello                 22 ( 8.6%)            I                                          
##                          6. Notre_Dame_de_Bonsecours   18 ( 7.1%)            I                                          
##                          7. Mont_Royal                 13 ( 5.1%)            I                                          
##                          8. Mont_Saint_Bruno           12 ( 4.7%)                                                       
##                          9. Papineauville              12 ( 4.7%)                                                       
##                          10. Brownsburg                 6 ( 2.4%)                                                       
##                          [ 5 others ]                  30 (11.8%)            II                                         
## 
## 2    Years               Min  : 2023                   2023 : 102 (40.0%)    IIIIIIII              255        0         
##      [integer]           Mean : 2023.6                 2024 : 153 (60.0%)    IIIIIIIIIIII          (100.0%)   (0.0%)    
##                          Max  : 2024                                                                                    
## 
## 3    Year                Min  : 0                      0 : 102 (40.0%)       IIIIIIII              255        0         
##      [integer]           Mean : 0.6                    1 : 153 (60.0%)       IIIIIIIIIIII          (100.0%)   (0.0%)    
##                          Max  : 1                                                                                       
## 
## 4    stand_type          1. MOM                        10 ( 3.9%)                                  255        0         
##      [character]         2. Oak                        73 (28.6%)            IIIII                 (100.0%)   (0.0%)    
##                          3. Oak/Other                  33 (12.9%)            II                                         
##                          4. Oak/Pine                   41 (16.1%)            III                                        
##                          5. Other                      30 (11.8%)            II                                         
##                          6. Pine                       39 (15.3%)            III                                        
##                          7. Pine/Oak                   29 (11.4%)            II                                         
## 
## 5    landscape_type      1. agricultural               139 (54.5%)           IIIIIIIIII            255        0         
##      [character]         2. forest·                     85 (33.3%)           IIIIII                (100.0%)   (0.0%)    
##                          3. urban                       31 (12.2%)           II                                         
## 
## 6    stand_category      1. C                          35 (17.9%)            III                   196        59        
##      [character]         2. I                          29 (14.8%)            II                    (76.9%)    (23.1%)   
##                          3. E                          24 (12.2%)            II                                         
##                          4. H                          18 ( 9.2%)            I                                          
##                          5. F                          17 ( 8.7%)            I                                          
##                          6. L                          15 ( 7.7%)            I                                          
##                          7. A                          10 ( 5.1%)            I                                          
##                          8. D                          10 ( 5.1%)            I                                          
##                          9. K                          10 ( 5.1%)            I                                          
##                          10. MOM                       10 ( 5.1%)            I                                          
##                          [ 4 others ]                  18 ( 9.2%)            I                                          
## 
## 7    Percent_Oak         Mean (sd) : 0.3 (0.2)         0.00 : 48 (18.8%)     III                   255        0         
##      [numeric]           min < med < max:              0.10 : 22 ( 8.6%)     I                     (100.0%)   (0.0%)    
##                          0 < 0.3 < 0.8                 0.20 : 10 ( 3.9%)                                                
##                          IQR (CV) : 0.4 (0.7)          0.30 : 58 (22.7%)     IIII                                       
##                                                        0.40 : 50 (19.6%)     III                                        
##                                                        0.50 : 18 ( 7.1%)     I                                          
##                                                        0.60 : 18 ( 7.1%)     I                                          
##                                                        0.70 : 19 ( 7.5%)     I                                          
##                                                        0.80 : 12 ( 4.7%)                                                
## 
## 8    Percent_Pine        Mean (sd) : 0.2 (0.2)         0.00 : 117 (45.9%)    IIIIIIIII             255        0         
##      [numeric]           min < med < max:              0.10 :  29 (11.4%)    II                    (100.0%)   (0.0%)    
##                          0 < 0.1 < 0.8                 0.20 :  27 (10.6%)    II                                         
##                          IQR (CV) : 0.4 (1.2)          0.30 :  12 ( 4.7%)                                               
##                                                        0.40 :  21 ( 8.2%)    I                                          
##                                                        0.50 :  19 ( 7.5%)    I                                          
##                                                        0.60 :   9 ( 3.5%)                                               
##                                                        0.70 :  15 ( 5.9%)    I                                          
##                                                        0.80 :   6 ( 2.4%)                                               
## 
## 9    Percent_Maple       Mean (sd) : 0.3 (0.2)         10 distinct values    II                    246        9         
##      [numeric]           min < med < max:                                    III                   (96.5%)    (3.5%)    
##                          0 < 0.2 < 0.9                                       IIII                                       
##                          IQR (CV) : 0.3 (0.6)                                IIII                                       
##                                                                              IIII                                       
## 
## 10   Forest_Type         1. oak                        65 (25.5%)            IIIII                 255        0         
##      [character]         2. oak_maple                  48 (18.8%)            III                   (100.0%)   (0.0%)    
##                          3. pine                       45 (17.6%)            III                                        
##                          4. oak_pine                   21 ( 8.2%)            I                                          
##                          5. maple_oak                  16 ( 6.3%)            I                                          
##                          6. pine_oak                   14 ( 5.5%)            I                                          
##                          7. maple                      12 ( 4.7%)                                                       
##                          8. maple_birch                 7 ( 2.7%)                                                       
##                          9. maple_hardwood?             5 ( 2.0%)                                                       
##                          10. pine_hemlock               5 ( 2.0%)                                                       
##                          [ 9 others ]                  17 ( 6.7%)            I                                          
## 
## 11   stand_area_ha       Mean (sd) : 8.4 (3.5)         67 distinct values      . :                 255        0         
##      [numeric]           min < med < max:                                    : : :                 (100.0%)   (0.0%)    
##                          3.5 < 8 < 25.9                                      : : :                                      
##                          IQR (CV) : 3.2 (0.4)                                : : : :                                    
##                                                                              : : : : :   .                              
## 
## 12   forest_area_km2     Mean (sd) : 210.2 (436.8)     13 distinct values    :                     255        0         
##      [numeric]           min < med < max:                                    :                     (100.0%)   (0.0%)    
##                          1.6 < 14 < 1282                                     :                                          
##                          IQR (CV) : 95.7 (2.1)                               :                                          
##                                                                              :           :                              
## 
## 13   trap_name           1. BC Co-Dom1                   1 ( 0.4%)                                 255        0         
##      [character]         2. BC Co-Dom2                   1 ( 0.4%)                                 (100.0%)   (0.0%)    
##                          3. BC Dom1                      1 ( 0.4%)                                                      
##                          4. BC Dom2                      1 ( 0.4%)                                                      
##                          5. BC Low 1                     1 ( 0.4%)                                                      
##                          6. BC Low 2                     1 ( 0.4%)                                                      
##                          7. Bolt Co-Dom1                 1 ( 0.4%)                                                      
##                          8. Bolt Co-Dom2                 1 ( 0.4%)                                                      
##                          9. Bolt Dom1                    1 ( 0.4%)                                                      
##                          10. Bolt Dom2                   1 ( 0.4%)                                                      
##                          [ 245 others ]                245 (96.1%)           IIIIIIIIIIIIIIIIIII                        
## 
## 14   longitude           Mean (sd) : -73.7 (1)         255 distinct values       :                 255        0         
##      [numeric]           min < med < max:                                        :                 (100.0%)   (0.0%)    
##                          -75 < -74 < -72                                       : :   :   :                              
##                          IQR (CV) : 1.2 (0)                                    : :   :   :                              
##                                                                              : : : : :   : .                            
## 
## 15   total_moth_count    Mean (sd) : 57.6 (51.4)       113 distinct values   :                     252        3         
##      [integer]           min < med < max:                                    :                     (98.8%)    (1.2%)    
##                          0 < 47.5 < 378                                      : .                                        
##                          IQR (CV) : 55.5 (0.9)                               : :                                        
##                                                                              : : :                                      
## 
## 16   complete            Mean (sd) : 72.8 (60.9)       92 distinct values    :                     142        113       
##      [integer]           min < med < max:                                    : :                   (55.7%)    (44.3%)   
##                          3 < 61 < 378                                        : :                                        
##                          IQR (CV) : 68.8 (0.8)                               : : :                                      
##                                                                              : : : .   .                                
## 
## 17   clean_complete      Mean (sd) : 62.3 (55.4)       140 distinct values   :                     200        55        
##      [numeric]           min < med < max:                                    : .                   (78.4%)    (21.6%)   
##                          0 < 49.8 < 378                                      : :                                        
##                          IQR (CV) : 59 (0.9)                                 : :                                        
##                                                                              : : :                                      
## 
## 18   censored            Min  : 0                      0 : 200 (78.4%)       IIIIIIIIIIIIIII       255        0         
##      [integer]           Mean : 0.2                    1 :  55 (21.6%)       IIII                  (100.0%)   (0.0%)    
##                          Max  : 1                                                                                       
## 
## 19   reason_incomplete   1. (Empty string)             84 (60.4%)            IIIIIIIIIIII          139        116       
##      [character]         2. Missing in field            2 ( 1.4%)                                  (54.5%)    (45.5%)   
##                          3. Muck                       44 (31.7%)            IIIIII                                     
##                          4. Trap damaged·               9 ( 6.5%)            I                                          
## 
## 20   X                   All NA's                                                                  0          255       
##      [logical]                                                                                     (0.0%)     (100.0%)  
## ------------------------------------------------------------------------------------------------------------------------
str(complete_2023_2024)
## 'data.frame':    255 obs. of  20 variables:
##  $ patch_name       : chr  "Mont_Royal" "Mont_Royal" "Mont_Royal" "Mont_Royal" ...
##  $ Years            : int  2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
##  $ Year             : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ stand_type       : chr  "Oak" "Oak" "Oak" "Oak" ...
##  $ landscape_type   : chr  "urban" "urban" "urban" "urban" ...
##  $ stand_category   : chr  "E" "A" "C" "C" ...
##  $ Percent_Oak      : num  0.4 0.8 0.6 0.6 0.6 0.4 0.4 0.4 0.4 0.4 ...
##  $ Percent_Pine     : num  0 0 0 0 0 0 0 0 0.1 0.1 ...
##  $ Percent_Maple    : num  0.3 0.1 0.3 0.3 0.3 0.3 0.3 0.3 0 0 ...
##  $ Forest_Type      : chr  "oak_maple" "oak" "oak" "oak" ...
##  $ stand_area_ha    : num  14 6.4 8.6 8.6 8.6 11.7 11.7 11.7 8.9 8.9 ...
##  $ forest_area_km2  : num  4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 ...
##  $ trap_name        : chr  "MRMOM" "MROak2.1" "MROak1.1" "MROak1.2" ...
##  $ longitude        : num  -73.6 -73.6 -73.6 -73.6 -73.6 ...
##  $ total_moth_count : int  93 15 7 20 6 96 52 NA 60 20 ...
##  $ complete         : int  93 15 7 20 6 96 52 NA 60 20 ...
##  $ clean_complete   : num  93 15 7 20 6 96 52 NA 60 20 ...
##  $ censored         : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ reason_incomplete: chr  NA NA NA NA ...
##  $ X                : logi  NA NA NA NA NA NA ...
## change 'Year' from int to chr
complete_2023_2024$Year <- as.character(complete_2023_2024$Year)

str(complete_2023_2024)
## 'data.frame':    255 obs. of  20 variables:
##  $ patch_name       : chr  "Mont_Royal" "Mont_Royal" "Mont_Royal" "Mont_Royal" ...
##  $ Years            : int  2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
##  $ Year             : chr  "1" "1" "1" "1" ...
##  $ stand_type       : chr  "Oak" "Oak" "Oak" "Oak" ...
##  $ landscape_type   : chr  "urban" "urban" "urban" "urban" ...
##  $ stand_category   : chr  "E" "A" "C" "C" ...
##  $ Percent_Oak      : num  0.4 0.8 0.6 0.6 0.6 0.4 0.4 0.4 0.4 0.4 ...
##  $ Percent_Pine     : num  0 0 0 0 0 0 0 0 0.1 0.1 ...
##  $ Percent_Maple    : num  0.3 0.1 0.3 0.3 0.3 0.3 0.3 0.3 0 0 ...
##  $ Forest_Type      : chr  "oak_maple" "oak" "oak" "oak" ...
##  $ stand_area_ha    : num  14 6.4 8.6 8.6 8.6 11.7 11.7 11.7 8.9 8.9 ...
##  $ forest_area_km2  : num  4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 ...
##  $ trap_name        : chr  "MRMOM" "MROak2.1" "MROak1.1" "MROak1.2" ...
##  $ longitude        : num  -73.6 -73.6 -73.6 -73.6 -73.6 ...
##  $ total_moth_count : int  93 15 7 20 6 96 52 NA 60 20 ...
##  $ complete         : int  93 15 7 20 6 96 52 NA 60 20 ...
##  $ clean_complete   : num  93 15 7 20 6 96 52 NA 60 20 ...
##  $ censored         : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ reason_incomplete: chr  NA NA NA NA ...
##  $ X                : logi  NA NA NA NA NA NA ...
# remove space in trap_name
# replace '-' with '_' in trap_name
complete_2023_2024 <- complete_2023_2024 %>%
  mutate(trap_name = str_replace(trap_name, " ", ""))
complete_2023_2024 <- complete_2023_2024 %>%
  mutate(trap_name = str_replace(trap_name, "-", "_"))

#Remove MOM traps from either "stand type" or "stand category"
stand_type_filtered <- complete_2023_2024 %>%
  filter(stand_type != "MOM")
stand_category_filtered <- complete_2023_2024 %>%
  filter(stand_category != "MOM")



# looking for mistakes
unique(complete_2023_2024$stand_type)
## [1] "Oak"       "Oak/Pine"  "Pine"      "MOM"       "Pine/Oak"  "Oak/Other"
## [7] "Other"
unique(complete_2023_2024$patch_name)
##  [1] "Mont_Royal"               "Mont_Saint_Hilaire"      
##  [3] "Montebello"               "Notre_Dame_de_Bonsecours"
##  [5] "Oka"                      "Orford"                  
##  [7] "Papineauville"            "Rigaud"                  
##  [9] "Brownsburg"               "Hatley"                  
## [11] "Kenauk"                   "Mont_Gauvin/Glen "       
## [13] "Mont_Saint_Bruno"         "Parc_Michel_Chartrand"   
## [15] "Parc_Pointe_aux_Prairies"
unique(complete_2023_2024$Year)
## [1] "1" "0"
unique(complete_2023_2024$landscape_type)
## [1] "urban"        "agricultural" "forest "
unique(complete_2023_2024$stand_category)
##  [1] "E"   "A"   "C"   "L"   "MOM" "D"   "H"   "I"   "B"   "G"   "K"   "M"  
## [13] "F"   "J"   NA
unique(stand_type_filtered$stand_type)
## [1] "Oak"       "Oak/Pine"  "Pine"      "Pine/Oak"  "Oak/Other" "Other"
unique(stand_category_filtered$stand_category)
##  [1] "E" "A" "C" "L" "D" "H" "I" "B" "G" "K" "M" "F" "J"
# Data Summaries ----------------------------------------------------------

##separate Stand Type column so that we have a Stand ID for each stand in each patch
stand_ID_filtered <- stand_type_filtered %>% 
  separate(trap_name, into = c("stand_ID", "trap_number"), remove = FALSE, sep = "\\." ) %>% 
  glimpse()
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 103 rows [1, 144, 145,
## 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161,
## 162, ...].
## Rows: 245
## Columns: 22
## $ patch_name        <chr> "Mont_Royal", "Mont_Royal", "Mont_Royal", "Mont_Roya…
## $ Years             <int> 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024…
## $ Year              <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1…
## $ stand_type        <chr> "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oa…
## $ landscape_type    <chr> "urban", "urban", "urban", "urban", "urban", "urban"…
## $ stand_category    <chr> "E", "A", "C", "C", "C", "E", "E", "E", "E", "E", "E…
## $ Percent_Oak       <dbl> 0.4, 0.8, 0.6, 0.6, 0.6, 0.4, 0.4, 0.4, 0.4, 0.4, 0.…
## $ Percent_Pine      <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.…
## $ Percent_Maple     <dbl> 0.3, 0.1, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.0, 0.0, 0.…
## $ Forest_Type       <chr> "oak_maple", "oak", "oak", "oak", "oak", "oak_maple"…
## $ stand_area_ha     <dbl> 14.0, 6.4, 8.6, 8.6, 8.6, 11.7, 11.7, 11.7, 8.9, 8.9…
## $ forest_area_km2   <dbl> 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89…
## $ trap_name         <chr> "MRMOM", "MROak2.1", "MROak1.1", "MROak1.2", "MROak1…
## $ stand_ID          <chr> "MRMOM", "MROak2", "MROak1", "MROak1", "MROak1", "MR…
## $ trap_number       <chr> NA, "1", "1", "2", "3", "1", "2", "3", "1", "2", "3"…
## $ longitude         <dbl> -73.59218, -73.58859, -73.58863, -73.58738, -73.5892…
## $ total_moth_count  <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ complete          <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ clean_complete    <dbl> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ censored          <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ reason_incomplete <chr> NA, NA, NA, NA, NA, NA, NA, "Missing in field", NA, …
## $ X                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
stand_ID_filtered_1 <- stand_category_filtered %>% 
  separate(trap_name, into = c("stand_ID", "trap_number"), remove = FALSE, sep = "\\." ) %>% 
  glimpse()
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 44 rows [1, 144, 145,
## 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161,
## 162, ...].
## Rows: 186
## Columns: 22
## $ patch_name        <chr> "Mont_Royal", "Mont_Royal", "Mont_Royal", "Mont_Roya…
## $ Years             <int> 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024…
## $ Year              <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1…
## $ stand_type        <chr> "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oa…
## $ landscape_type    <chr> "urban", "urban", "urban", "urban", "urban", "urban"…
## $ stand_category    <chr> "E", "A", "C", "C", "C", "E", "E", "E", "E", "E", "E…
## $ Percent_Oak       <dbl> 0.4, 0.8, 0.6, 0.6, 0.6, 0.4, 0.4, 0.4, 0.4, 0.4, 0.…
## $ Percent_Pine      <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.…
## $ Percent_Maple     <dbl> 0.3, 0.1, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.0, 0.0, 0.…
## $ Forest_Type       <chr> "oak_maple", "oak", "oak", "oak", "oak", "oak_maple"…
## $ stand_area_ha     <dbl> 14.0, 6.4, 8.6, 8.6, 8.6, 11.7, 11.7, 11.7, 8.9, 8.9…
## $ forest_area_km2   <dbl> 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89…
## $ trap_name         <chr> "MRMOM", "MROak2.1", "MROak1.1", "MROak1.2", "MROak1…
## $ stand_ID          <chr> "MRMOM", "MROak2", "MROak1", "MROak1", "MROak1", "MR…
## $ trap_number       <chr> NA, "1", "1", "2", "3", "1", "2", "3", "1", "2", "3"…
## $ longitude         <dbl> -73.59218, -73.58859, -73.58863, -73.58738, -73.5892…
## $ total_moth_count  <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ complete          <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ clean_complete    <dbl> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ censored          <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ reason_incomplete <chr> NA, NA, NA, NA, NA, NA, NA, "Missing in field", NA, …
## $ X                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
total_moths_2023_2024 <- sum(complete_2023_2024$clean_complete, na.rm = TRUE)
print(total_moths_2023_2024)
## [1] 12456
n_distinct(complete_2023_2024$trap_name)
## [1] 255
n_distinct(stand_ID_filtered$stand_ID)
## [1] 155
n_distinct(stand_ID_filtered$patch_name)
## [1] 15
#check to see the distribution of moth count data
hist(complete_2023_2024$clean_complete, 
          main = " ", 
          xlab = "Spongy moth count/trap", 
          ylab = "Frequency", 
          col = "darkblue", 
          border = "black")

hist(complete_2023_2024$clean_complete, 
     main = " ", 
     xlab = "Spongy moth count/trap", 
     ylab = "Frequency", 
     col = "blue", 
     border = "black",
     breaks = 50)  # increase this number to make bins smaller

#Calculate the mean and standard deviation of moth counts for each 
#level of stand_category to see if there are differences.

# Checking unique combinations

table(stand_category_filtered$stand_category, 
      stand_category_filtered$patch_name)
##    
##     Brownsburg Hatley Kenauk Mont_Gauvin/Glen  Mont_Royal Mont_Saint_Bruno
##   A          0      0      1                 0          1                1
##   B          0      0      0                 0          0                0
##   C          2      2      1                 2          3                3
##   D          0      0      0                 0          0                0
##   E          0      0      0                 0          8                0
##   F          0      0      0                 0          0                0
##   G          0      0      0                 0          0                0
##   H          0      0      0                 0          0                0
##   I          0      0      0                 0          0                0
##   J          0      0      0                 0          0                0
##   K          0      0      0                 0          0                0
##   L          2      0      0                 0          1                0
##   M          0      0      0                 0          0                0
##    
##     Mont_Saint_Hilaire Montebello Notre_Dame_de_Bonsecours Oka Orford
##   A                  0          0                        0   0      3
##   B                  0          2                        0   0      0
##   C                  8          4                        6   0      2
##   D                  3          0                        0   0      0
##   E                  0          0                        0   8      6
##   F                  2          0                        5   0      5
##   G                  0          2                        0   1      0
##   H                  4          0                        1   8      0
##   I                  2          5                        0  18      0
##   J                  0          0                        0   0      4
##   K                  0          2                        0   5      0
##   L                  1          0                        0   2      7
##   M                  0          2                        2   0      2
##    
##     Papineauville Parc_Michel_Chartrand Rigaud
##   A             4                     0      0
##   B             0                     0      0
##   C             0                     2      0
##   D             0                     0      7
##   E             2                     0      0
##   F             0                     0      5
##   G             0                     0      0
##   H             2                     0      3
##   I             0                     0      4
##   J             0                     0      3
##   K             0                     0      3
##   L             0                     0      2
##   M             0                     0      0
table(stand_type_filtered$stand_type,
      stand_type_filtered$patch_name)
##            
##             Brownsburg Hatley Kenauk Mont_Gauvin/Glen  Mont_Royal
##   Oak                2      2      1                 2          8
##   Oak/Other          2      2      2                 2          0
##   Oak/Pine           0      0      1                 0          4
##   Other              0      2      2                 2          0
##   Pine               2      0      0                 0          1
##   Pine/Oak           0      0      0                 0          0
##            
##             Mont_Saint_Bruno Mont_Saint_Hilaire Montebello
##   Oak                      3                  8          6
##   Oak/Other                5                  3          2
##   Oak/Pine                 0                  8          2
##   Other                    4                  4          2
##   Pine                     0                  1          5
##   Pine/Oak                 0                  2          4
##            
##             Notre_Dame_de_Bonsecours Oka Orford Papineauville
##   Oak                              6   6     14             6
##   Oak/Other                        1   2      4             2
##   Oak/Pine                         5   9      2             2
##   Other                            2   0      4             2
##   Pine                             2   7     13             0
##   Pine/Oak                         1  18      0             0
##            
##             Parc_Michel_Chartrand Parc_Pointe_aux_Prairies Rigaud
##   Oak                           2                        0      7
##   Oak/Other                     2                        2      2
##   Oak/Pine                      0                        0      8
##   Other                         2                        4      0
##   Pine                          0                        0      8
##   Pine/Oak                      0                        0      4
# Set the levels of 'stand_type' to ensure the correct order
stand_type_filtered$stand_type <- factor(stand_type_filtered$stand_type, 
                              levels = c("Oak", "Oak/Pine", "Oak/Other", 
                                  "Pine/Oak", "Pine", "Other"))

# Create the 'stand-type by patch' table
table(stand_type_filtered$stand_type, stand_type_filtered$patch_name)
##            
##             Brownsburg Hatley Kenauk Mont_Gauvin/Glen  Mont_Royal
##   Oak                2      2      1                 2          8
##   Oak/Pine           0      0      1                 0          4
##   Oak/Other          2      2      2                 2          0
##   Pine/Oak           0      0      0                 0          0
##   Pine               2      0      0                 0          1
##   Other              0      2      2                 2          0
##            
##             Mont_Saint_Bruno Mont_Saint_Hilaire Montebello
##   Oak                      3                  8          6
##   Oak/Pine                 0                  8          2
##   Oak/Other                5                  3          2
##   Pine/Oak                 0                  2          4
##   Pine                     0                  1          5
##   Other                    4                  4          2
##            
##             Notre_Dame_de_Bonsecours Oka Orford Papineauville
##   Oak                              6   6     14             6
##   Oak/Pine                         5   9      2             2
##   Oak/Other                        1   2      4             2
##   Pine/Oak                         1  18      0             0
##   Pine                             2   7     13             0
##   Other                            2   0      4             2
##            
##             Parc_Michel_Chartrand Parc_Pointe_aux_Prairies Rigaud
##   Oak                           2                        0      7
##   Oak/Pine                      0                        0      8
##   Oak/Other                     2                        2      2
##   Pine/Oak                      0                        0      4
##   Pine                          0                        0      8
##   Other                         2                        4      0
# Create the contingency table
contingency_table <- table(stand_type_filtered$stand_type, stand_type_filtered$patch_name)

# Convert the table to a data frame
contingency_df <- as.data.frame(contingency_table)


# Heatmap stands by patch -------------------------------------------------

# Create a plot (heatmap) of the contingency table
contingency_df 
##         Var1                     Var2 Freq
## 1        Oak               Brownsburg    2
## 2   Oak/Pine               Brownsburg    0
## 3  Oak/Other               Brownsburg    2
## 4   Pine/Oak               Brownsburg    0
## 5       Pine               Brownsburg    2
## 6      Other               Brownsburg    0
## 7        Oak                   Hatley    2
## 8   Oak/Pine                   Hatley    0
## 9  Oak/Other                   Hatley    2
## 10  Pine/Oak                   Hatley    0
## 11      Pine                   Hatley    0
## 12     Other                   Hatley    2
## 13       Oak                   Kenauk    1
## 14  Oak/Pine                   Kenauk    1
## 15 Oak/Other                   Kenauk    2
## 16  Pine/Oak                   Kenauk    0
## 17      Pine                   Kenauk    0
## 18     Other                   Kenauk    2
## 19       Oak        Mont_Gauvin/Glen     2
## 20  Oak/Pine        Mont_Gauvin/Glen     0
## 21 Oak/Other        Mont_Gauvin/Glen     2
## 22  Pine/Oak        Mont_Gauvin/Glen     0
## 23      Pine        Mont_Gauvin/Glen     0
## 24     Other        Mont_Gauvin/Glen     2
## 25       Oak               Mont_Royal    8
## 26  Oak/Pine               Mont_Royal    4
## 27 Oak/Other               Mont_Royal    0
## 28  Pine/Oak               Mont_Royal    0
## 29      Pine               Mont_Royal    1
## 30     Other               Mont_Royal    0
## 31       Oak         Mont_Saint_Bruno    3
## 32  Oak/Pine         Mont_Saint_Bruno    0
## 33 Oak/Other         Mont_Saint_Bruno    5
## 34  Pine/Oak         Mont_Saint_Bruno    0
## 35      Pine         Mont_Saint_Bruno    0
## 36     Other         Mont_Saint_Bruno    4
## 37       Oak       Mont_Saint_Hilaire    8
## 38  Oak/Pine       Mont_Saint_Hilaire    8
## 39 Oak/Other       Mont_Saint_Hilaire    3
## 40  Pine/Oak       Mont_Saint_Hilaire    2
## 41      Pine       Mont_Saint_Hilaire    1
## 42     Other       Mont_Saint_Hilaire    4
## 43       Oak               Montebello    6
## 44  Oak/Pine               Montebello    2
## 45 Oak/Other               Montebello    2
## 46  Pine/Oak               Montebello    4
## 47      Pine               Montebello    5
## 48     Other               Montebello    2
## 49       Oak Notre_Dame_de_Bonsecours    6
## 50  Oak/Pine Notre_Dame_de_Bonsecours    5
## 51 Oak/Other Notre_Dame_de_Bonsecours    1
## 52  Pine/Oak Notre_Dame_de_Bonsecours    1
## 53      Pine Notre_Dame_de_Bonsecours    2
## 54     Other Notre_Dame_de_Bonsecours    2
## 55       Oak                      Oka    6
## 56  Oak/Pine                      Oka    9
## 57 Oak/Other                      Oka    2
## 58  Pine/Oak                      Oka   18
## 59      Pine                      Oka    7
## 60     Other                      Oka    0
## 61       Oak                   Orford   14
## 62  Oak/Pine                   Orford    2
## 63 Oak/Other                   Orford    4
## 64  Pine/Oak                   Orford    0
## 65      Pine                   Orford   13
## 66     Other                   Orford    4
## 67       Oak            Papineauville    6
## 68  Oak/Pine            Papineauville    2
## 69 Oak/Other            Papineauville    2
## 70  Pine/Oak            Papineauville    0
## 71      Pine            Papineauville    0
## 72     Other            Papineauville    2
## 73       Oak    Parc_Michel_Chartrand    2
## 74  Oak/Pine    Parc_Michel_Chartrand    0
## 75 Oak/Other    Parc_Michel_Chartrand    2
## 76  Pine/Oak    Parc_Michel_Chartrand    0
## 77      Pine    Parc_Michel_Chartrand    0
## 78     Other    Parc_Michel_Chartrand    2
## 79       Oak Parc_Pointe_aux_Prairies    0
## 80  Oak/Pine Parc_Pointe_aux_Prairies    0
## 81 Oak/Other Parc_Pointe_aux_Prairies    2
## 82  Pine/Oak Parc_Pointe_aux_Prairies    0
## 83      Pine Parc_Pointe_aux_Prairies    0
## 84     Other Parc_Pointe_aux_Prairies    4
## 85       Oak                   Rigaud    7
## 86  Oak/Pine                   Rigaud    8
## 87 Oak/Other                   Rigaud    2
## 88  Pine/Oak                   Rigaud    4
## 89      Pine                   Rigaud    8
## 90     Other                   Rigaud    0
ggplot(contingency_df, aes(x = Var1, y = Var2, fill = Freq)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "darkred") +
  labs(x = "Stand Type", y = "Patch Name", fill = "Frequency") +
  theme_minimal()

# Save the plot as an image (e.g., PNG)
#ggsave("contingency_table_heatmap.png", width = 7.5, height = 6)

#convert the data in the heatmap plot to a table format
wide_table <- contingency_df %>%
  pivot_wider(names_from = Var1, values_from = Freq, values_fill = 0)

wide_table <- wide_table %>%
  mutate(
    Var2 = trimws(as.character(Var2)),
    Var2 = dplyr::recode(Var2,
                          "Mont_Gauvin/Glen" = "Mont Gauvin",
                          "Mont_Royal" = "Mont Royal",
                          "Mont_Saint_Bruno" = "Mont Saint Bruno",
                          "Mont_Saint_Hilaire" = "Mont Saint Hilaire",
                          "Notre_Dame_de_Bonsecours" = "Notre Dame de Bonsecours",
                          "Oka" = "Parc Oka",
                          "Orford" = "Mont Orford",
                          "Parc_Michel_Chartrand" = "Parc Michel Chartrand",
                          "Parc_Pointe_aux_Prairies" = "Parc Pointe aux Prairies",
                          "Rigaud" = "Mont Rigaud",
                         ))


##print(wide_table)

patch_order <- c("Papineauville", "Montebello", "Notre Dame de Bonsecours",
                 "Kenauk", "Brownsburg", "Mont Rigaud", "Parc Oka",
                 "Mont Royal",
                 "Parc Pointe aux Prairies", "Parc Michel Chartrand",
                 "Mont Saint Bruno", "Mont Saint Hilaire","Mont Gauvin",
                 "Mont Orford", "Hatley")
wide_table <- wide_table %>%
  mutate(Var2 = factor(Var2, levels = patch_order)) %>%
  arrange(Var2)

##print(wide_table)

gt_table <- wide_table %>%
  gt() %>%
  cols_label(
    Var2 = "Patch Name"
  ) %>%
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16,
    heading.subtitle.font.size = 14
  ) %>%
  cols_align(
    align = "center",
    columns = everything()
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels(everything())
  ) %>%
  tab_style(  # Custom striping on even-numbered rows
    style = cell_fill(color = "#f9f9f0"),  # You can change this color
    locations = cells_body(rows = seq(2, nrow(wide_table), 2))
  )

##gtsave(gt_table, "heatmap_table.png")      # Image
##gtsave(gt_table, "heatmap_table.html")     # Web preview

version$version.string
## [1] "R version 4.4.3 (2025-02-28)"
# Summary statistics for moth count by stand_category and stand_type
summary_stats <- stand_type_filtered %>%
  group_by(stand_category, stand_type, patch_name) %>%
  summarise(
    mean_count = mean(total_moth_count, na.rm = TRUE),
    sd_count = sd(total_moth_count, na.rm = TRUE),
    var_count = var(total_moth_count, na.rm = TRUE),
    count = n()
  )
## `summarise()` has grouped output by 'stand_category', 'stand_type'. You can
## override using the `.groups` argument.
##print(summary_stats, n=22)




# Table to use -----------------------------------------------------------

# Summary statistics for moth count by stand_type, differentiating between
##traps set and traps with usable data
summary_stats_3 <- stand_type_filtered %>%
  group_by(stand_type) %>%
  summarise(
    mean_count = mean(clean_complete, na.rm = TRUE),
    sd_count = sd(clean_complete, na.rm = TRUE),
    n_traps = n(),
    n_obs = sum(!is.na (clean_complete))
  )

print(summary_stats_3, n=22)
## # A tibble: 6 × 5
##   stand_type mean_count sd_count n_traps n_obs
##   <fct>           <dbl>    <dbl>   <int> <int>
## 1 Oak              77.0     69.8      73    57
## 2 Oak/Pine         64.4     52.6      41    30
## 3 Oak/Other        37.3     27.0      33    29
## 4 Pine/Oak         85.8     61.1      29    20
## 5 Pine             54.8     33.2      39    27
## 6 Other            36.9     26.8      30    27
# Round numeric columns to 2 decimal places
summary_stats_3$mean_count <- round(summary_stats_3$mean_count, 2)
summary_stats_3$sd_count <- round(summary_stats_3$sd_count, 2)
summary_stats_3$count <- round(summary_stats_3$n_obs, 2)



# Summary statistics for moth count by patch
summary_stats_4 <- stand_type_filtered %>%
  group_by(patch_name) %>%
  summarise(
    mean_count = mean(clean_complete, na.rm = TRUE),
    sd_count = sd(clean_complete, na.rm = TRUE),
    count = n()
  )

print(summary_stats_4, n=22)
## # A tibble: 15 × 4
##    patch_name                 mean_count sd_count count
##    <chr>                           <dbl>    <dbl> <int>
##  1 "Brownsburg"                     38.6     9.40     6
##  2 "Hatley"                         74.8    18.4      6
##  3 "Kenauk"                         26.8    20.4      6
##  4 "Mont_Gauvin/Glen "              34.1    11.6      6
##  5 "Mont_Royal"                     35.5    32.0     13
##  6 "Mont_Saint_Bruno"               24.7    13.9     12
##  7 "Mont_Saint_Hilaire"             23.5    29.6     26
##  8 "Montebello"                     54      44.8     21
##  9 "Notre_Dame_de_Bonsecours"       54.8    32.9     17
## 10 "Oka"                           105.     63.6     42
## 11 "Orford"                         88.0    67.9     37
## 12 "Papineauville"                  71.0    56.2     12
## 13 "Parc_Michel_Chartrand"          70.2    17.4      6
## 14 "Parc_Pointe_aux_Prairies"       43.3    32.7      6
## 15 "Rigaud"                         38.1    26.4     29
## Remove MOM row in 'complete' moth counts
moth_by_stand_summary_stats <- summary_stats_3[-c(1),]
print(moth_by_stand_summary_stats, n=22)
## # A tibble: 5 × 6
##   stand_type mean_count sd_count n_traps n_obs count
##   <fct>           <dbl>    <dbl>   <int> <int> <dbl>
## 1 Oak/Pine         64.4     52.6      41    30    30
## 2 Oak/Other        37.3     27.0      33    29    29
## 3 Pine/Oak         85.8     61.1      29    20    20
## 4 Pine             54.8     33.2      39    27    27
## 5 Other            36.9     26.8      30    27    27
## Remove MOM row in 'clean_complete' moth counts
moth_by_stand_summary_stats_2 <- summary_stats_4[-c(1),]
print(moth_by_stand_summary_stats_2, n=22)
## # A tibble: 14 × 4
##    patch_name                 mean_count sd_count count
##    <chr>                           <dbl>    <dbl> <int>
##  1 "Hatley"                         74.8     18.4     6
##  2 "Kenauk"                         26.8     20.4     6
##  3 "Mont_Gauvin/Glen "              34.1     11.6     6
##  4 "Mont_Royal"                     35.5     32.0    13
##  5 "Mont_Saint_Bruno"               24.7     13.9    12
##  6 "Mont_Saint_Hilaire"             23.5     29.6    26
##  7 "Montebello"                     54       44.8    21
##  8 "Notre_Dame_de_Bonsecours"       54.8     32.9    17
##  9 "Oka"                           105.      63.6    42
## 10 "Orford"                         88.0     67.9    37
## 11 "Papineauville"                  71.0     56.2    12
## 12 "Parc_Michel_Chartrand"          70.2     17.4     6
## 13 "Parc_Pointe_aux_Prairies"       43.3     32.7     6
## 14 "Rigaud"                         38.1     26.4    29
###NEED TO SAVE AND EXPORT THESE SUMMARY TABLES###


# Visualizations ----------------------------------------------------------

ggplot(stand_ID_filtered, aes(x = Percent_Oak, y = clean_complete)) +
  geom_point(alpha = 0.5) +
  stat_smooth(method = "glm", method.args = list(family = "poisson"), 
              se = TRUE, color = "darkgreen") +
  labs(
    title = "Effect of Percent Oak on Moth Counts",
    x = "% Oak Cover",
    y = "Moth Count"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 55 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 39.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 91.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 71.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 77.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 46.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 25.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 35.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 42.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 43.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 86.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 29.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 68.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 92.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 80.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 57.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 81.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 59.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 50.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 55.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 62.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 34.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 50.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 32.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 77.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 49.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 79.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 97.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 64.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.250000
## Warning: Removed 55 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Negative Binomial models ------------------------------------------------

#checking compatibility of glmmTMB and lme4 packages
packageVersion("glmmTMB")
## [1] '1.1.13'
packageVersion("lme4")
## [1] '1.1.37'
packageVersion("Matrix")
## [1] '1.7.4'
packageVersion("TMB")
## [1] '1.9.18'
packageVersion("mgcv")
## [1] '1.9.1'
sapply(c("Matrix", "TMB", "lme4", "glmmTMB"), find.package)
##                                                                         Matrix 
##  "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Matrix" 
##                                                                            TMB 
##     "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/TMB" 
##                                                                           lme4 
##    "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/lme4" 
##                                                                        glmmTMB 
## "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/glmmTMB"
#need to convert to 'factor' for negative binomial
stand_ID_filtered$patch_name <- as.factor(stand_ID_filtered$patch_name)
stand_ID_filtered$stand_ID <- as.factor(stand_ID_filtered$stand_ID)

library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:lme4':
## 
##     lmList
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
## 
## Attaching package: 'mgcv'
## 
## The following objects are masked from 'package:brms':
## 
##     s, t2
##NB with Oak

Oak_nb_model <- gam(round(clean_complete) ~ Percent_Oak + 
                   s(patch_name, bs = "re") +  # random effect for patch_name
                   s(stand_ID, bs = "re"),     # random effect for stand_ID
                 family = nb(),                # negative binomial
                 method = "REML", 
                 data = stand_ID_filtered)
summary(Oak_nb_model)
## 
## Family: Negative Binomial(2.488) 
## Link function: log 
## 
## Formula:
## round(clean_complete) ~ Percent_Oak + s(patch_name, bs = "re") + 
##     s(stand_ID, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.5213     0.1636  21.524  < 2e-16 ***
## Percent_Oak   0.6857     0.2653   2.585  0.00975 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df Chi.sq p-value    
## s(patch_name) 10.46     14 185.75 < 2e-16 ***
## s(stand_ID)   35.46    129  65.87 0.00472 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.456   Deviance explained = 56.1%
## -REML =  942.8  Scale est. = 1         n = 190
plot(Oak_nb_model, pages = 1)

# Fitted values
fitted_vals_oak <- fitted(Oak_nb_model)

# Pearson residuals
pearson_resid_oak <- residuals(Oak_nb_model, type = "pearson")

# Residual degrees of freedom
rdf_oak <- df.residual(Oak_nb_model)

plot(fitted_vals_oak, pearson_resid_oak, 
     xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_oak <- sum(pearson_resid_oak^2) / rdf_oak
dispersion_oak
## [1] 0.8515649
#dispersion = 0.852, indicating that there is no over (or much under) dispersion

performance::check_model(Oak_nb_model, residual_type = "normal")

##NB with Pine

Pine_nb_model <- gam(round(clean_complete) ~ Percent_Pine + 
                      s(patch_name, bs = "re") +  # random effect for patch_name
                      s(stand_ID, bs = "re"),     # random effect for stand_ID
                    family = nb(),                # negative binomial
                    method = "REML", 
                    data = stand_ID_filtered)
summary(Pine_nb_model)
## 
## Family: Negative Binomial(2.518) 
## Link function: log 
## 
## Formula:
## round(clean_complete) ~ Percent_Pine + s(patch_name, bs = "re") + 
##     s(stand_ID, bs = "re")
## 
## Parametric coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    3.7206     0.1376  27.036   <2e-16 ***
## Percent_Pine   0.2016     0.3034   0.664    0.506    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df Chi.sq p-value    
## s(patch_name) 10.06     14 161.96 < 2e-16 ***
## s(stand_ID)   41.21    129  82.84 0.00112 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.448   Deviance explained = 57.9%
## -REML = 945.55  Scale est. = 1         n = 190
plot(Pine_nb_model, pages = 1)

# Fitted values
fitted_vals_pine <- fitted(Pine_nb_model)

# Pearson residuals
pearson_resid_pine <- residuals(Pine_nb_model, type = "pearson")

# Residual degrees of freedom
rdf_pine <- df.residual(Pine_nb_model)

plot(fitted_vals_pine, pearson_resid_pine, 
     xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_pine <- sum(pearson_resid_pine^2) / rdf_pine
dispersion_pine
## [1] 0.8448824
#dispersion = 0.845, indicating that there is no over (or much under) dispersion

performance::check_model(Pine_nb_model, residual_type = "normal")

##NB with both Oak and Pine

Both_nb_model <- gam(round(clean_complete) ~ Percent_Oak + Percent_Pine + 
                       Percent_Oak*Percent_Pine +
                       s(patch_name, bs = "re") +  # random effect for patch_name
                       s(stand_ID, bs = "re"),     # random effect for stand_ID
                     family = nb(),                # negative binomial
                     method = "REML", 
                     data = stand_ID_filtered)
summary(Both_nb_model)
## 
## Family: Negative Binomial(2.663) 
## Link function: log 
## 
## Formula:
## round(clean_complete) ~ Percent_Oak + Percent_Pine + Percent_Oak * 
##     Percent_Pine + s(patch_name, bs = "re") + s(stand_ID, bs = "re")
## 
## Parametric coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                3.2773     0.1881  17.419  < 2e-16 ***
## Percent_Oak                0.9823     0.3214   3.057  0.00224 ** 
## Percent_Pine               0.6611     0.3769   1.754  0.07939 .  
## Percent_Oak:Percent_Pine   1.7512     1.8331   0.955  0.33941    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df Chi.sq  p-value    
## s(patch_name) 10.21     14 166.49  < 2e-16 ***
## s(stand_ID)   40.02    127  79.11 3.91e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.461   Deviance explained = 60.3%
## -REML = 938.48  Scale est. = 1         n = 190
plot(Both_nb_model, pages = 1)

# Fitted values
fitted_vals_both <- fitted(Both_nb_model)

# Pearson residuals
pearson_resid_both <- residuals(Both_nb_model, type = "pearson")

# Residual degrees of freedom
rdf_both <- df.residual(Both_nb_model)

plot(fitted_vals_both, pearson_resid_both, 
     xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_both <- sum(pearson_resid_both^2) / rdf_both
dispersion_both
## [1] 0.8628572
#dispersion = 0.852, indicating that there is no over (or much under) dispersion

performance::check_model(Both_nb_model, residual_type = "normal")

#Adding a variable looking at year 1 and 2 

Year_nb_model <- gam(round(clean_complete) ~ 
                       Year +
                       s(patch_name, bs = "re") +  # random effect for patch_name
                       s(stand_ID, bs = "re"),     # random effect for stand_ID
                     family = nb(),                # negative binomial
                     method = "REML", 
                     data = stand_ID_filtered)
summary(Year_nb_model)
## 
## Family: Negative Binomial(2.594) 
## Link function: log 
## 
## Formula:
## round(clean_complete) ~ Year + s(patch_name, bs = "re") + s(stand_ID, 
##     bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.4825     0.1475   23.61  < 2e-16 ***
## Year1         0.7215     0.1366    5.28 1.29e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                 edf Ref.df Chi.sq p-value    
## s(patch_name) 10.93     14 149.03  <2e-16 ***
## s(stand_ID)   26.25    129  39.73  0.0237 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.479   Deviance explained = 55.5%
## -REML = 934.83  Scale est. = 1         n = 190
plot(Year_nb_model, pages = 1)

# Fitted values
fitted_vals_year <- fitted(Year_nb_model)

# Pearson residuals
pearson_resid_year <- residuals(Year_nb_model, type = "pearson")

# Residual degrees of freedom
rdf_year <- df.residual(Year_nb_model)

plot(fitted_vals_year, pearson_resid_year, 
     xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_year <- sum(pearson_resid_year^2) / rdf_year
dispersion_year
## [1] 0.819819
#dispersion = 0.820, indicating that there is no over (or much under) dispersion

performance::check_model(Year_nb_model, residual_type = "normal")

# All Variables -----------------------------------------------------------

##Fitting a gam model with all of the possible response variables, to 
#explore a correlation between all possible variables and moth counts

all_variable_nb <- gam(round(clean_complete) ~ Percent_Oak + Percent_Pine + 
                         + landscape_type + longitude + 
                         forest_area_km2 + stand_area_ha +
                         s(patch_name, bs = "re") +  # random effect for patch_name
                         s(stand_ID, bs = "re"),     # random effect for stand_ID
                       family = nb(),                # negative binomial
                       method = "REML", 
                       data = stand_ID_filtered)
summary(all_variable_nb)
## 
## Family: Negative Binomial(2.785) 
## Link function: log 
## 
## Formula:
## round(clean_complete) ~ Percent_Oak + Percent_Pine + +landscape_type + 
##     longitude + forest_area_km2 + stand_area_ha + s(patch_name, 
##     bs = "re") + s(stand_ID, bs = "re")
## 
## Parametric coefficients:
##                         Estimate Std. Error z value Pr(>|z|)   
## (Intercept)            6.8753148 14.0950754   0.488   0.6257   
## Percent_Oak            0.9746785  0.3210116   3.036   0.0024 **
## Percent_Pine           0.6667394  0.3631498   1.836   0.0664 . 
## landscape_typeforest   0.5757522  0.3651147   1.577   0.1148   
## landscape_typeurban   -0.1991551  0.3834729  -0.519   0.6035   
## longitude              0.0472188  0.1917962   0.246   0.8055   
## forest_area_km2       -0.0004520  0.0003947  -1.145   0.2522   
## stand_area_ha         -0.0144073  0.0207391  -0.695   0.4872   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                  edf Ref.df Chi.sq  p-value    
## s(patch_name)  8.931     14 172.76  < 2e-16 ***
## s(stand_ID)   41.467    124  77.11 3.57e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.471   Deviance explained = 62.9%
## -REML = 949.47  Scale est. = 1         n = 190
plot(all_variable_nb, pages = 1)

# Fitted values
fitted_vals_all <- fitted(all_variable_nb)

# Pearson residuals
pearson_resid_all <- residuals(all_variable_nb, type = "pearson")

# Residual degrees of freedom
rdf_all <- df.residual(all_variable_nb)

plot(fitted_vals_all, pearson_resid_all, 
     xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_all <- sum(pearson_resid_all^2) / rdf_all
dispersion_all
## [1] 0.893226
#dispersion = 0.893, indicating that there is no over (or much under) dispersion

performance::check_model(all_variable_nb, residual_type = "normal")

# Print a clean table to the console or a markdown/HTML-friendly output
# Save directly to a file
tab_model(all_variable_nb,
          show.stat = TRUE,
          p.style = "numeric",
          file = "model_summary.doc")   # can be .doc, .html, .htm
  round(clean complete)
Predictors Incidence Rate Ratios CI Statistic p
(Intercept) 968.08 0.00 – 963077404573204.62 0.49 0.626
Percent Oak 2.65 1.41 – 4.97 3.04 0.002
Percent Pine 1.95 0.96 – 3.97 1.84 0.066
landscape typeforest 1.78 0.87 – 3.64 1.58 0.115
landscape type [urban] 0.82 0.39 – 1.74 -0.52 0.604
longitude 1.05 0.72 – 1.53 0.25 0.806
forest area km2 1.00 1.00 – 1.00 -1.15 0.252
stand area ha 0.99 0.95 – 1.03 -0.69 0.487
Smooth term (patch name) 172.76 <0.001
Smooth term (stand ID) 77.11 <0.001
Observations 190
R2 0.471
#checking for collinearity between variables

#VIF = 1 indicates no multicollinearity
collinearity_all <- check_collinearity(all_variable_nb)
print(collinearity_all)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##           Term  VIF     VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
##    Percent_Oak 2.69 [ 2.20,  3.38]     1.64      0.37     [0.30, 0.46]
##   Percent_Pine 3.73 [ 3.00,  4.75]     1.93      0.27     [0.21, 0.33]
##  stand_area_ha 2.88 [ 2.34,  3.63]     1.70      0.35     [0.28, 0.43]
## 
## High Correlation
## 
##             Term   VIF     VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
##        longitude 16.52 [12.80, 21.42]     4.06      0.06     [0.05, 0.08]
##  forest_area_km2 11.25 [ 8.76, 14.55]     3.35      0.09     [0.07, 0.11]
# checking for co-variation between numeric predictors
#in a Pearson correlation matrix
numeric_vars <- stand_ID_filtered[, c("Percent_Oak", "Percent_Pine", "longitude", 
                                      "forest_area_km2", "stand_area_ha")]
# Correlation matrix
cor(numeric_vars, use = "complete.obs")
##                 Percent_Oak Percent_Pine   longitude forest_area_km2
## Percent_Oak      1.00000000  -0.52500780 -0.15613141     -0.01062395
## Percent_Pine    -0.52500780   1.00000000 -0.11765413      0.05010221
## longitude       -0.15613141  -0.11765413  1.00000000     -0.41050141
## forest_area_km2 -0.01062395   0.05010221 -0.41050141      1.00000000
## stand_area_ha   -0.03911951   0.09618211  0.03658612     -0.24775945
##                 stand_area_ha
## Percent_Oak       -0.03911951
## Percent_Pine       0.09618211
## longitude          0.03658612
## forest_area_km2   -0.24775945
## stand_area_ha      1.00000000
library(gt)

cor_mat <- cor(numeric_vars, use = "complete.obs")
cor_df <- as.data.frame(round(cor_mat, 3))
cor_df |> gt::gt() |> gt::tab_caption("Correlation Matrix")
Correlation Matrix
Percent_Oak Percent_Pine longitude forest_area_km2 stand_area_ha
1.000 -0.525 -0.156 -0.011 -0.039
-0.525 1.000 -0.118 0.050 0.096
-0.156 -0.118 1.000 -0.411 0.037
-0.011 0.050 -0.411 1.000 -0.248
-0.039 0.096 0.037 -0.248 1.000
ggplot(stand_ID_filtered, aes(x = Percent_Oak, y = clean_complete, color = landscape_type)) +
  geom_point(alpha = 0.6) +  # Add raw data points with transparency
  geom_smooth(method = "lm", se = FALSE) +  # Add regression lines by group
  theme_minimal() +
  labs(
    title = " ",
    x = "Percent Oak",
    y = "Moth Counts",
    color = "Landscape Type"
  ) +
  scale_color_viridis_d(option = "D")  # You can also try options "A", "B", "C", "E"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 55 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 55 rows containing missing values or values outside the scale range
## (`geom_point()`).